samples <- read.table(file.path("Analysis/SraRunTable.txt"), sep = ",", header = TRUE) %>%
  dplyr::mutate(cell_line = ifelse(grepl("A673", x = source_name), "A673", "TC71")) %>%
  dplyr::mutate(condition = ifelse(grepl("WT|siCT", x = GENOTYPE), "Control", "Treatment"))

A673_samples <- samples %>%
  dplyr::filter(GENOTYPE %in% c("WT", "SA2 KO") & cell_line == "A673")
  
TC71_samples <- samples %>%
  dplyr::filter(GENOTYPE %in% c("SA2 KO", "WT") & cell_line == "TC71")

S2KO_samples <- samples %>%
  dplyr::filter(GENOTYPE == "SA2 KO")

A673_salmon_files <- file.path("Salmon/salmon.out", A673_samples$Run, "quant.sf") %>%
  setNames(object = , A673_samples$Run)

TC71_salmon_files <- file.path("Salmon/salmon.out", TC71_samples$Run, "quant.sf") %>%
  setNames(object = , TC71_samples$Run)

S2KO_salmon_files <- file.path("Salmon/salmon.out", S2KO_samples$Run, "quant.sf") %>%
  setNames(object = , S2KO_samples$Run)

ensdb <- EnsDb.Hsapiens.v86

transcripts <- transcripts(ensdb, columns = c(listColumns(ensdb, "tx"), "gene_name"),
  return.type = "data.frame") %>%
  as_tibble() %>%
  dplyr::select(tx_id, gene_name) 

A673_txi <- tximport(A673_salmon_files, type = "salmon", tx2gene = transcripts, ignoreTxVersion = TRUE)

TC71_txi <- tximport(TC71_salmon_files, type = "salmon", tx2gene = transcripts, ignoreTxVersion = TRUE)

S2KO_txi <- tximport(S2KO_salmon_files, type = "salmon", tx2gene = transcripts, ignoreTxVersion = TRUE)

A673_dds_txi <- DESeqDataSetFromTximport(A673_txi, colData = A673_samples, design = ~condition)

TC71_dds_txi <- DESeqDataSetFromTximport(TC71_txi, colData = TC71_samples, design = ~condition)

S2KO_dds_txi <- DESeqDataSetFromTximport(S2KO_txi, colData = S2KO_samples, design = ~cell_line)

Exploratory Data

A673 PCA Plot

vst <- vst(A673_dds_txi)

A673_PCA <- plotPCA(vst, intgroup = c("cell_line", "GENOTYPE"), returnData = TRUE)
A673_percentVar <- round(100 * attr(A673_PCA, "percentVar"))

ggplot(A673_PCA, aes(PC1,PC2, color = GENOTYPE)) +
  geom_point(size = 3) +
  ggtitle("PCA Plot for A673 STAG2KO samples" ) +
  xlab(paste0("PC1: ", A673_percentVar[1], "% variance")) +
  ylab(paste0("PC2: ", A673_percentVar[2], "% variance")) +
  coord_fixed() 

TC71 PCA Plot

TC71_vst <- vst(TC71_dds_txi)

TC71_PCA <- plotPCA(TC71_vst, intgroup = c("cell_line", "GENOTYPE"), returnData = TRUE)
TC71_percentVar <- round(100 * attr(TC71_PCA, "percentVar"))

ggplot(TC71_PCA, aes(PC1,PC2, color = GENOTYPE)) +
  geom_point(size = 3) +
  ggtitle("PCA Plot for TC71 STAG2KO samples" ) +
  xlab(paste0("PC1: ", A673_percentVar[1], "% variance")) +
  ylab(paste0("PC2: ", A673_percentVar[2], "% variance")) +
  coord_fixed() 

All STAG2KO Samples PCA Plot

S2KO_vst <- vst(S2KO_dds_txi)

S2KO_PCA <- plotPCA(S2KO_vst, intgroup = c("cell_line", "GENOTYPE"), returnData = TRUE)
S2KO_percentVar <- round(100 * attr(S2KO_PCA, "percentVar"))

ggplot(S2KO_PCA, aes(PC1,PC2, color = cell_line)) +
  geom_point(size = 3) +
  ggtitle("PCA Plot for All STAG2KO samples" ) +
  xlab(paste0("PC1: ", S2KO_percentVar[1], "% variance")) +
  ylab(paste0("PC2: ", S2KO_percentVar[2], "% variance")) +
  coord_fixed() 

Sample Metadata

A673_metadata <- colData(A673_dds_txi)

TC71_metadata <- colData(TC71_dds_txi)

all_metadata <- rbind(A673_metadata,TC71_metadata)

all_metadata %>% 
  as.data.frame() %>%
  dplyr::select(GENOTYPE, cell_line) %>%
  kbl(caption = "Table 1: Sample Overview") %>%
  kable_styling(bootstrap_options = "striped", full_width = T, html_font = "Cambria")
Table 1: Sample Overview
GENOTYPE cell_line
SRR9326185 SA2 KO A673
SRR9326186 SA2 KO A673
SRR9326187 SA2 KO A673
SRR9326195 WT A673
SRR9326196 WT A673
SRR9326197 WT A673
SRR9326198 SA2 KO TC71
SRR9326199 SA2 KO TC71
SRR9326200 SA2 KO TC71
SRR9326201 SA2 KO TC71
SRR9326202 SA2 KO TC71
SRR9326203 SA2 KO TC71
SRR9326208 WT TC71
SRR9326209 WT TC71
SRR9326210 WT TC71

Results

Statistical Analysis

A673 Volcano Plot

EnhancedVolcano(A673_sig_ordered_result, 
                lab = A673_sig_ordered_result$Gene_name, 
                x = "log2FoldChange", 
                y = "pvalue", 
                title = "Siginifcant Genes for STAG2KO in A673 cells",
                subtitle = "",
                pointSize = 1.0, 
                labSize = 4.0,
                xlim = c(min(A673_sig_ordered_result$log2FoldChange), max(A673_sig_ordered_result$log2FoldChange)),
                ylim = c(0, 300)
                )

TC71 Volcano Plot

EnhancedVolcano(TC71_sig_ordered_result, 
                lab = TC71_sig_ordered_result$Gene_name, 
                x = "log2FoldChange", 
                y = "pvalue", 
                title =  "Siginifcant Genes for STAG2KO in TC71 Cells versus Control",
                subtitle = "",
                pointSize = 1.0, 
                labSize = 4.0,
                xlim = c(min(TC71_sig_ordered_result$log2FoldChange), max(TC71_sig_ordered_result$log2FoldChange)),
                ylim = c(0, 200)
                )

all STAG2KO samples Volcano Plot

EnhancedVolcano(S2KO_sig_ordered_result, 
                lab = S2KO_sig_ordered_result$Gene_name, 
                x = "log2FoldChange", 
                y = "pvalue", 
                title =  "Siginifcant Genes for between the STAG2KO samples in A673 and TC71 cell lines",
                subtitle = "",
                pointSize = 1.0, 
                labSize = 4.0,
                xlim = c(min(S2KO_sig_ordered_result$log2FoldChange), max(S2KO_sig_ordered_result$log2FoldChange)),
                ylim = c(0, 200)
                )

A672 Significant Genes Table

A673_table_result <- dplyr::select(A673_sig_ordered_result, Gene_name, log2FoldChange, stat, pvalue, padj)

datatable(A673_table_result, class = 'cell-border stripe', 
          caption = "Table 2: A672 STAG2KO Differentally Significant Genes")

TC71 Significant Genes Table

TC71_table_result <- dplyr::select(TC71_sig_ordered_result, Gene_name, log2FoldChange, stat, pvalue, padj)

datatable(TC71_table_result, class = 'cell-border stripe', 
          caption = "Table 3: TC71 STAG2KO Differentally Significant Genes")

all STAG2KO samples Significant Genes Table

S2KO_table_result <- dplyr::select(S2KO_sig_ordered_result, Gene_name, log2FoldChange, stat, pvalue, padj)

datatable(S2KO_table_result, class = 'cell-border stripe', 
          caption = "Table 4: STAG2KO samples Differentally Significant Genes")

Top 10 Overexpressed and 10 Underexpressed genes for STAG2KO in A673 cells

A673_heatmap <- pheatmap(A673_sig_norm_dds_counts,
                    main = "Top 10 Over- and Under- expressed A673 STAG2KO DEGs",
                    color = palette(200), 
                    cluster_rows = FALSE,
                    cluster_cols = FALSE,
                    show_rownames = TRUE, 
                    annotation = dplyr::select(A673_heat_meta, condition), 
                    scale = "row") 

Top 10 Overexpressed and 10 Underexpressed genes for STAG2KO in A673 cells

TC71_heatmap <- pheatmap(TC71_sig_norm_dds_counts,
                    main = "Top 10 Over- and Under- expressed TC71 STAG2KO DEGs",
                    color = palette(200), 
                    cluster_rows = FALSE,
                    cluster_cols = FALSE,
                    show_rownames = TRUE, 
                    annotation = dplyr::select(TC71_heat_meta, condition), 
                    scale = "row") 

Top 10 Overexpressed and 10 Underexpressed genes for STAG2KO in A673 cells

S2KO_heatmap <- pheatmap(S2KO_sig_norm_dds_counts,
                    main = "Top 10 Over- and Under- expressed A673 STAG2KO DEGs",
                    color = palette(200), 
                    cluster_rows = FALSE,
                    cluster_cols = FALSE,
                    show_rownames = TRUE, 
                    annotation = dplyr::select(S2KO_heat_meta, cell_line), 
                    scale = "row") 

GSEA Enrichment analysis for A673

A673_ordered_gse <- A673_gse %>%
  dplyr::arrange(desc(abs(NES)))

A673_ordered_gse_df <- A673_ordered_gse %>%
  as_tibble() %>%
  dplyr::select(ONTOLOGY, ID, Description, NES, pvalue, p.adjust, qvalue)

datatable(A673_ordered_gse_df, class = 'cell-border stripe', 
          caption = "Table 4: GSEA enrichment results for A673 STAG2KO significant DEGs")

Top Ranked enrichched pathway for A673 STAG2KO vs Control

gseaplot2(A673_ordered_gse, geneSetID = 1, title = A673_ordered_gse$Description[1])

GSEA Enrichment analysis for TC71

TC71_ordered_gse <- TC71_gse %>%
  dplyr::arrange(desc(abs(NES)))

TC71_ordered_gse_df <- TC71_ordered_gse %>%
  as_tibble() %>%
  dplyr::select(ONTOLOGY, ID, Description, NES, pvalue, p.adjust, qvalue)

datatable(TC71_ordered_gse_df, class = 'cell-border stripe', 
          caption = "Table 5: GSEA enrichment results for TC71 STAG2KO significant DEGs")

Top Ranked enrichched pathway for TC71 STAG2KO vs Control

gseaplot2(TC71_ordered_gse, geneSetID = 1, title = TC71_ordered_gse$Description[1])

GSEA Enrichment analysis for all STAG2KO samples

S2KO_ordered_gse <- S2KO_gse %>%
  dplyr::arrange(desc(abs(NES)))

S2KO_ordered_gse_df <- S2KO_ordered_gse %>%
  as_tibble() %>%
  dplyr::select(ONTOLOGY, ID, Description, NES, pvalue, p.adjust, qvalue)

datatable(S2KO_ordered_gse_df, class = 'cell-border stripe', 
          caption = "Table 5: GSEA enrichment results for all STAG2KO samples significant DEGs")

Top Ranked enrichched pathway for all STAG2KO samples

gseaplot2(S2KO_ordered_gse, geneSetID = 1, title = S2KO_ordered_gse$Description[1])

A673 enrichr Pathway enrichment

A673_resRmd <- llply(names(A673_gene_list), function(groupNow) {
  genesNow <- A673_gene_list[[groupNow]]
  response <- httr::POST(  
    url = 'https://maayanlab.cloud/Enrichr/addList', 
    body = list(
      'list' = paste0(genesNow, collapse = "\n"),
      'description' = groupNow
      )
    )
  response <- jsonlite::fromJSON(httr::content(response, as = "text"))  
  permalink <- paste0("https://maayanlab.cloud/Enrichr/enrich?dataset=", 
                      response$shortId[1])
  knitr::knit_child(text = c( 
    '### `r groupNow`',
    '',
    'Enrichr Link: <a href="`r permalink`" target="_blank">`r groupNow`</a>.',
    ''
  ), 
  envir = environment(),  
  quiet = TRUE)
})
cat(unlist(A673_resRmd), sep = '\n')

Over-expressed

Enrichr Link: Over-expressed.

Under-expressed

Enrichr Link: Under-expressed.

TC71 enrichr Pathway enrichment

TC71_resRmd <- llply(names(TC71_gene_list), function(groupNow) {
  genesNow <- TC71_gene_list[[groupNow]]
  response <- httr::POST(  
    url = 'https://maayanlab.cloud/Enrichr/addList', 
    body = list(
      'list' = paste0(genesNow, collapse = "\n"),
      'description' = groupNow
      )
    )
  response <- jsonlite::fromJSON(httr::content(response, as = "text"))  
  permalink <- paste0("https://maayanlab.cloud/Enrichr/enrich?dataset=", 
                      response$shortId[1])
  knitr::knit_child(text = c( 
    '### `r groupNow`',
    '',
    'Enrichr Link: <a href="`r permalink`" target="_blank">`r groupNow`</a>.',
    ''
  ), 
  envir = environment(),  
  quiet = TRUE)
})
cat(unlist(TC71_resRmd), sep = '\n')

Over-expressed

Enrichr Link: Over-expressed.

Under-expressed

Enrichr Link: Under-expressed.

all STAG2KO samples enrichr Pathway enrichment

S2KO_resRmd <- llply(names(S2KO_gene_list), function(groupNow) {
  genesNow <- S2KO_gene_list[[groupNow]]
  response <- httr::POST(  
    url = 'https://maayanlab.cloud/Enrichr/addList', 
    body = list(
      'list' = paste0(genesNow, collapse = "\n"),
      'description' = groupNow
      )
    )
  response <- jsonlite::fromJSON(httr::content(response, as = "text"))  
  permalink <- paste0("https://maayanlab.cloud/Enrichr/enrich?dataset=", 
                      response$shortId[1])
  knitr::knit_child(text = c( 
    '### `r groupNow`',
    '',
    'Enrichr Link: <a href="`r permalink`" target="_blank">`r groupNow`</a>.',
    ''
  ), 
  envir = environment(),  
  quiet = TRUE)
})
cat(unlist(S2KO_resRmd), sep = '\n')

Over-expressed

Enrichr Link: Over-expressed.

Under-expressed

Enrichr Link: Under-expressed.